/*==============================================================================
FILE NAME: Figure_2.do
CREATED: 12 June 2025
==============================================================================*/

**Figure 2

/* Set directory if working independently through code
if c(username)=="" { //insert username
	global rootdir "" // insert root path
	global processed_data "$rootdir/processed_data" 
	global figures "$rootdir/output/figures"  // Define global paths for replication package
} 
*/
set scheme modern
// Complaints by Year (Fig 2.A)
use "$processed_data/incidents.dta", clear

* Extract numeric RN_id from RegulatedEntity (strip first 2 characters "RN")
gen RN_id=substr(RegulatedEntity,3,.)
label var RN_id "same as RN without 'RN'"
destring RN_id, replace

* Rename complaint ID variable
rename ComplaintIncident CIN

* Keep only ID and date variables
keep CIN IncidentRecDate

* Drop duplicate complaint records
duplicates drop

* Create indicator for incident
gen incident=1
gen temp = date(IncidentRecDate,"MDY")
drop IncidentRecDate 
rename temp IncidentRecDate
format IncidentRecDate %td

* Drop invalid dates
replace IncidentRecDate = . if IncidentRecDate < 0

* Extract year from date
gen year=year(IncidentRecDate)

* Keep only years 2003-2019
keep if year>2002 & year<2020
sort year

* Expore data for replication
export delimited "$point_estimates/Point_Estimates_Figure_2_Panel_A.csv", replace

* Plot number of complaints per year
graph set window fontface "Times New Roman"
graph bar (sum) incident, over(year, label(angle(45) labsize(huge))) ///
    /*title ("Total Complaints by Year")*/ ///
    ytitle("# of Complaints", size(huge)) ///
    ylabel(, labsize(huge)) ///
    xsize(8.6) ///
    bar(1, color(black)) ///
    graphregion(color(white)) ///
    plotregion(color(white))


* Save figure
graph export "$figures/Figure_2_Panel_A.pdf", replace


// Complaints by Month (Fig 2.B)
use "$processed_data/incidents.dta", clear
gen RN_id=substr(RegulatedEntity,3,.)
label var RN_id "same as RN without 'RN'"
destring RN_id, replace
rename ComplaintIncident CIN
keep CIN IncidentRecDate
duplicates drop
gen incident=1

* Convert and format dates
gen temp = date(IncidentRecDate,"MDY")
drop IncidentRecDate 
rename temp IncidentRecDate
format IncidentRecDate %td
replace IncidentRecDate = . if IncidentRecDate < 0

* Keep only years 2003-2019
gen year=year(IncidentRecDate)
keep if year>2002 & year<2020

* Extract month, make variables
gen month=month(IncidentRecDate)

* Collapse to total complaints by month/year, then average across sort month
sort month
collapse (sum) incident, by(month year)
collapse (mean) incident, by(month)

* Label months
label define month 1 "January" 2 "February" 3 "March" 4 "April" 5 "May" 6 "June" 7 "July" 8 "August" 9 "September" 10 "October" 11 "November" 12 "December"
label values month month

* Export data
export delimited "$point_estimates/Point_Estimates_Figure_2_Panel_B.csv", replace

graph set window fontface "Times New Roman"
graph bar (sum) incident, over(month, label(angle(45) labsize(huge))) /*title ("Average Annual Complaints by Calendar Month (2003-2019)")*/ ytitle("# of Complaints", size(huge)) ylabel(, labsize(huge)) xsize(8.6) bar(1, color(black)) graphregion(color(white)) plotregion(color(white))

* Save figure
graph export "$figures/Figure_2_Panel_B.pdf", replace


// Complaints by Day of Week
use "$processed_data/incidents.dta", clear
gen RN_id=substr(RegulatedEntity,3,.)
label var RN_id "same as RN without 'RN'"
destring RN_id, replace
rename ComplaintIncident CIN
keep CIN IncidentRecDate
duplicates drop
gen incident=1

* Convert and format dates
gen temp = date(IncidentRecDate,"MDY")
drop IncidentRecDate 
rename temp IncidentRecDate
format IncidentRecDate %td
replace IncidentRecDate = . if IncidentRecDate < 0

* Keep only years 2003-2019
gen year=year(IncidentRecDate)
keep if year>2002 & year<2020

* Extract day of week (0=Sunday,...,6=Saturday)
gen dow=dow(IncidentRecDate)
label define dow 0 "Sunday" 1 "Monday" 2 "Tuesday" 3 "Wednesday" 4 "Thursday" 5 "Friday" 6 "Saturday"
label values dow dow

* Collapse to total by DOW/year, then average across years
collapse (sum) incident, by(dow year)
collapse (mean) incident, by(dow)
sort dow

* Export data
export delimited "$point_estimates/Point_Estimates_Figure_2_Panel_C.csv", replace

* Plot average complaints by day of week
graph set window fontface "Times New Roman"
graph bar (sum) incident, over(dow, label(angle(45) labsize(huge))) /*title ("Average Annual Complaints by Day of Week (2003-2019)")*/ ytitle("# of Complaints", size(huge)) ylabel(, labsize(huge))  xsize(8.6) bar(1, color(black)) graphregion(color(white)) plotregion(color(white))

* Save figure
graph export "$figures/Figure_2_Panel_C.pdf", replace
